dyn.load("/home/moyra.lawrence/miniconda3/envs/monocle3/lib/libgeos_c.so.1")
library(Seurat)
## Warning in fun(libname, pkgname): rgeos: versions of GEOS runtime 3.11.0-CAPI-1.17.0
## and GEOS at installation 3.8.0-CAPI-1.13.1differ
library(patchwork)
library(ggplot2)
library(gridExtra)
library(openxlsx)
library(svglite)
library(dplyr)
library(gt)
library(tidyverse)
library(stringi)
#Loading embryo data and preparing for seurat
raw_data <- data.table::fread("/home/moyra.lawrence/R_analysis/Second_10x_run/GSE45719_matrix.txt.gz", data.table = FALSE)
names(raw_data)[1] <- 'Gene'
names <- make.unique(raw_data$Gene)
rownames(raw_data) <- names
raw_data <- raw_data[,-1] # get rid of old names
ML11 <- CreateSeuratObject(counts = raw_data, project = "ML11")
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')
VlnPlot(ML11, pt.size=0, features = c("nFeature_RNA", "nCount_RNA"), ncol = 3)
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
VlnPlot(ML11, pt.size=0, features = c("nFeature_RNA", "nCount_RNA"), ncol = 3, log=TRUE)
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
FeatureScatter(ML11, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
#Quick investigation of markers
ML11 <- FindVariableFeatures(ML11, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(ML11), 10)
plot1 <- VariableFeaturePlot(ML11)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 1133 rows containing missing values (geom_point).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
### Scaling
all.genes <- rownames(ML11)
ML11 <- ScaleData(ML11, features = all.genes)
scale.factor = 10000
ML11 <- NormalizeData(ML11, normalization.method = "LogNormalize", scale.factor = 10000)
ML11 <- RunPCA(ML11, npcs=100, features = VariableFeatures(object = ML11))
print(ML11[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(ML11, dims = 1:6, reduction = "pca")
DimPlot(ML11, reduction = "pca")
DimHeatmap(ML11, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(ML11, ndims=100)
## UMAP resolution = 0.7 for ML11
#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
ML11 <- FindNeighbors(ML11, dims = 1:50)
ML11 <- FindClusters(ML11, resolution = 1)
ML11 <- RunUMAP(ML11, dims = 1:50)
DimPlot(ML11, reduction = "umap")
tbl_cell <- table(ML11@active.ident, ML11@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add
## 16cell 4cell 8cell BXC C57twocell early2cell earlyblast
## 1 23 (39.7%) 14 (100%) 24 (51.1%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
## 2 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 18 (41.9%)
## 3 0 (0%) 0 (0%) 1 (2.1%) 0 (0%) 0 (0%) 0 (0%) 21 (48.8%)
## 4 1 (1.7%) 0 (0%) 0 (0%) 3 (23.1%) 8 (100%) 8 (100%) 0 (0%)
## 5 26 (44.8%) 0 (0%) 14 (29.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
## 6 8 (13.8%) 0 (0%) 8 (17%) 10 (76.9%) 0 (0%) 0 (0%) 4 (9.3%)
## fibroblast late2cell lateblast mid2cell midblast zy1 zy2
## 1 0 (0%) 0 (0%) 0 (0%) 3 (25%) 0 (0%) 0 (0%) 0 (0%)
## 2 5 (50%) 0 (0%) 18 (60%) 0 (0%) 22 (36.7%) 0 (0%) 0 (0%)
## 3 0 (0%) 0 (0%) 8 (26.7%) 0 (0%) 31 (51.7%) 0 (0%) 0 (0%)
## 4 5 (50%) 10 (100%) 0 (0%) 7 (58.3%) 0 (0%) 1 (100%) 1 (100%)
## 5 0 (0%) 0 (0%) 0 (0%) 2 (16.7%) 0 (0%) 0 (0%) 0 (0%)
## 6 0 (0%) 0 (0%) 4 (13.3%) 0 (0%) 7 (11.7%) 0 (0%) 0 (0%)
## zy3 zy4 rownames
## 1 0 (0%) 0 (0%) 0
## 2 0 (0%) 0 (0%) 1
## 3 0 (0%) 0 (0%) 2
## 4 1 (100%) 1 (100%) 3
## 5 0 (0%) 0 (0%) 4
## 6 0 (0%) 0 (0%) 5
mus.markers <- FindAllMarkers(ML11, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.xlsx(mus.markers, file = 'ML11.celltype.allmarkers.xlsx')
mus.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
top10
## [1] "LOC100502936" "Bcl2l10" "E330034G19Rik" "Zscan5b"
## [5] "Gm1995" "Oas1d" "S100a4" "Tcl1"
## [9] "Zfp707" "Serpinf1"
save(ML11, file="GSE_dataset.rda")
load(file='GSE_dataset.rda')
#Plotting seurat totipotent cluster markers
load(file='GSE_dataset.rda')
seurat <- c("Cacna1s", "Cenpe", "Dppa2", "Dppa5a", "Dusp4", "Gm2056", "Gm5788", "Gm2035", "Gm5039", "Gm16368", "Gm21319", "Gm2016", "Gm2022", "Gm4027", "Gm5662", "Gm8300", "Eif4a2", "Fbxo34", "Nelfa", "Nub1", "Olfr374", "Platr31", "Polr2a", "B020004J07Rik", "Gm12800", "Gm12794", "BC080695", "Gm13119", "Rbbp6", "Rpl10l", "Rpl12", "Rpl39l", "Rplp0", "Rxra", "Sp110", "Sp140", "Ube2c", "Ube2s", "Ube2t", "Ube2w", "Uimc1", "Usp17la", "Usp17lc", "Usp17le", "Usp26", "Usp9y", "Ythdc1", "Zfp296", "Zfp352", "Zfp51", "Zfp516", "Zfp560", "Zfp809", "Zfp820", "Zfp936", "Zfp950", "Zfp980", "Zfp981", "Zfp988", "Zfp992", "Zscan4c", "Zscan4d", "Zscan4e", "Zscan4-ps1", "Zscan4-ps2", "Zscan4-ps3")
seurat2<- c("Kdm1a", "Hdac1", "Hdac2", "Rcor1", "Sumo2", "Zmym2", "Clec10a", "Zfp42", "Arg2")
levels(ML11$orig.ident)
## [1] "16cell" "4cell" "8cell" "BXC" "C57twocell"
## [6] "early2cell" "earlyblast" "fibroblast" "late2cell" "lateblast"
## [11] "mid2cell" "midblast" "zy1" "zy2" "zy3"
## [16] "zy4"
ML11$orig.ident<- factor(x = ML11$orig.ident, levels = c("zy1", "zy2", "zy3", "zy4", "C57twocell", "early2cell", "mid2cell", "late2cell", "4cell", "8cell", "16cell", "earlyblast", "midblast", "lateblast", "fibroblast", "BXC"))
DoHeatmap(ML11, features = seurat, group.by = "orig.ident") + theme(axis.text.y = element_text(size = 6))
## Warning in DoHeatmap(ML11, features = seurat, group.by = "orig.ident"): The
## following features were omitted as they were not found in the scale.data slot
## for the RNA assay: Zscan4-ps3, Zscan4-ps2, Zscan4-ps1, Zfp992, Zfp988, Zfp981,
## Zfp980, Zfp950, Usp17le, Usp17lc, Usp17la, Gm12800, Platr31, Nelfa, Gm21319,
## Gm16368, Gm2035, Gm5788, Gm2056
DoHeatmap(ML11, features = seurat2,group.by = "orig.ident") + theme(axis.text.y = element_text(size = 6))
raw_data <- data.table::fread("/home/moyra.lawrence/R_analysis/Second_10x_run/GSE45719_matrix.txt.gz", data.table = FALSE)
names(raw_data)[1] <- 'Gene'
names <- make.unique(raw_data$Gene)
rownames(raw_data) <- names
raw_data <- raw_data[,-1] # get rid of old names
colnames(raw_data) <- gsub("_", "-", colnames(raw_data))
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "_", replacement = "-")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy1", replacement = "zygote-1")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy2", replacement = "zygote-2")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy3", replacement = "zygote-3")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy4", replacement = "zygote-4")
ML11 <- CreateSeuratObject(counts=raw_data, project="ML11", names.delim="-")
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')
ML11$orig.ident
## 16cell-1-10 16cell-1-11 16cell-1-12
## 16cell 16cell 16cell
## 16cell-1-13 16cell-1-14 16cell-1-15
## 16cell 16cell 16cell
## 16cell-1-2 16cell-1-3 16cell-1-4
## 16cell 16cell 16cell
## 16cell-1-5 16cell-1-6 16cell-1-7
## 16cell 16cell 16cell
## 16cell-1-8 16cell-1-9 16cell-4-1
## 16cell 16cell 16cell
## 16cell-4-10 16cell-4-11 16cell-4-2
## 16cell 16cell 16cell
## 16cell-4-3 16cell-4-4 16cell-4-5
## 16cell 16cell 16cell
## 16cell-4-6 16cell-4-7 16cell-4-8
## 16cell 16cell 16cell
## 16cell-4-9 16cell-5-1 16cell-5-10
## 16cell 16cell 16cell
## 16cell-5-11 16cell-5-12 16cell-5-13
## 16cell 16cell 16cell
## 16cell-5-2 16cell-5-3 16cell-5-4
## 16cell 16cell 16cell
## 16cell-5-5 16cell-5-6 16cell-5-7
## 16cell 16cell 16cell
## 16cell-5-8 16cell-5-9 16cell-6-1
## 16cell 16cell 16cell
## 16cell-6-10 16cell-6-11 16cell-6-12
## 16cell 16cell 16cell
## 16cell-6-2 16cell-6-3 16cell-6-4
## 16cell 16cell 16cell
## 16cell-6-5 16cell-6-6 16cell-6-7
## 16cell 16cell 16cell
## 16cell-6-8 16cell-6-9 4cell-1-1
## 16cell 16cell 4cell
## 4cell-1-2 4cell-1-4 4cell-2-1
## 4cell 4cell 4cell
## 4cell-2-2 4cell-2-3 4cell-2-4
## 4cell 4cell 4cell
## 4cell-3-1 4cell-3-3 4cell-3-4
## 4cell 4cell 4cell
## 4cell-4-1 4cell-4-2 4cell-4-3
## 4cell 4cell 4cell
## 4cell-4-4 8cell-1-1 8cell-1-2
## 4cell 8cell 8cell
## 8cell-1-4 8cell-1-5 8cell-1-6
## 8cell 8cell 8cell
## 8cell-1-7 8cell-1-8 8cell-2-1
## 8cell 8cell 8cell
## 8cell-2-2 8cell-2-3 8cell-2-4
## 8cell 8cell 8cell
## 8cell-2-6 8cell-2-7 8cell-2-8
## 8cell 8cell 8cell
## 8cell-5-1 8cell-5-2 8cell-5-3
## 8cell 8cell 8cell
## 8cell-5-4 8cell-5-6 8cell-5-7
## 8cell 8cell 8cell
## 8cell-5-8 8cell-8-1 8cell-8-2
## 8cell 8cell 8cell
## 8cell-8-3 8cell-8-4 8cell-8-6
## 8cell 8cell 8cell
## 8cell-8-7 8cell-8-8 BXC-100pg-liver-RNA-1
## 8cell 8cell BXC
## BXC-100pg-liver-RNA-2 BXC-10pg-liver-RNA-1 BXC-10pg-liver-RNA-2
## BXC BXC BXC
## BXC-1ng-liver-RNA-0r BXC-1ng-liver-RNA-1 BXC-30pg-liver-RNA-0r
## BXC BXC BXC
## BXC-30pg-liver-RNA-2 BXC-liver-cell-1 BXC-liver-cell-2
## BXC BXC BXC
## BXC-liver-cell-4 BXC-liver-cell-5 BXC-liver-cell-6
## BXC BXC BXC
## C57twocell-16-1 C57twocell-16-2 C57twocell-18-1
## C57twocell C57twocell C57twocell
## C57twocell-18-2 C57twocell-20-1 C57twocell-20-2
## C57twocell C57twocell C57twocell
## C57twocell-22-1 C57twocell-22-2 early2cell-0r-1
## C57twocell C57twocell early2cell
## early2cell-0r-2 early2cell-1-1 early2cell-1-2
## early2cell early2cell early2cell
## early2cell-2-1 early2cell-2-2 early2cell-3-1
## early2cell early2cell early2cell
## early2cell-3-2 earlyblast-2-1 earlyblast-2-10
## early2cell earlyblast earlyblast
## earlyblast-2-12 earlyblast-2-15 earlyblast-2-16
## earlyblast earlyblast earlyblast
## earlyblast-2-17 earlyblast-2-19 earlyblast-2-2
## earlyblast earlyblast earlyblast
## earlyblast-2-22 earlyblast-2-3 earlyblast-2-4
## earlyblast earlyblast earlyblast
## earlyblast-2-5 earlyblast-2-6 earlyblast-2-7
## earlyblast earlyblast earlyblast
## earlyblast-2-8 earlyblast-3-1 earlyblast-3-10
## earlyblast earlyblast earlyblast
## earlyblast-3-12 earlyblast-3-13 earlyblast-3-14
## earlyblast earlyblast earlyblast
## earlyblast-3-15 earlyblast-3-16 earlyblast-3-17
## earlyblast earlyblast earlyblast
## earlyblast-3-2 earlyblast-3-3 earlyblast-3-4
## earlyblast earlyblast earlyblast
## earlyblast-3-6 earlyblast-3-7 earlyblast-3-8
## earlyblast earlyblast earlyblast
## earlyblast-3-9 earlyblast-4-1 earlyblast-4-12
## earlyblast earlyblast earlyblast
## earlyblast-4-13 earlyblast-4-14 earlyblast-4-16
## earlyblast earlyblast earlyblast
## earlyblast-4-17 earlyblast-4-18 earlyblast-4-2
## earlyblast earlyblast earlyblast
## earlyblast-4-3 earlyblast-4-5 earlyblast-4-6
## earlyblast earlyblast earlyblast
## earlyblast-4-8 earlyblast-4-9 late2cell-5-1
## earlyblast earlyblast late2cell
## late2cell-5-2 late2cell-6-1 late2cell-6-2
## late2cell late2cell late2cell
## late2cell-7-1 late2cell-7-2 late2cell-8-1
## late2cell late2cell late2cell
## late2cell-8-2 late2cell-9-1 late2cell-9-2
## late2cell late2cell late2cell
## lateblast-1-10 lateblast-1-11 lateblast-1-13
## lateblast lateblast lateblast
## lateblast-1-14 lateblast-1-16 lateblast-1-19
## lateblast lateblast lateblast
## lateblast-1-2 lateblast-1-20 lateblast-1-21
## lateblast lateblast lateblast
## lateblast-1-23 lateblast-1-24 lateblast-1-26
## lateblast lateblast lateblast
## lateblast-1-27 lateblast-1-4 lateblast-1-5
## lateblast lateblast lateblast
## lateblast-1-6 lateblast-1-7 lateblast-1-8
## lateblast lateblast lateblast
## lateblast-1-9 lateblast-2-1 lateblast-2-12
## lateblast lateblast lateblast
## lateblast-2-14 lateblast-2-16 lateblast-2-17
## lateblast lateblast lateblast
## lateblast-2-2 lateblast-2-3 lateblast-2-5
## lateblast lateblast lateblast
## lateblast-2-7 lateblast-2-8 lateblast-2-9
## lateblast lateblast lateblast
## mid2cell-0r-1 mid2cell-0r-2 mid2cell-3-1
## mid2cell mid2cell mid2cell
## mid2cell-3-2 mid2cell-4-1 mid2cell-4-2
## mid2cell mid2cell mid2cell
## mid2cell-5-1 mid2cell-5-2 mid2cell-6-1
## mid2cell mid2cell mid2cell
## mid2cell-6-2 mid2cell-7-1 mid2cell-7-2
## mid2cell mid2cell mid2cell
## midblast-1-1 midblast-1-10 midblast-1-11
## midblast midblast midblast
## midblast-1-12 midblast-1-13 midblast-1-14
## midblast midblast midblast
## midblast-1-15 midblast-1-16 midblast-1-17
## midblast midblast midblast
## midblast-1-18 midblast-1-19 midblast-1-2
## midblast midblast midblast
## midblast-1-20 midblast-1-22 midblast-1-23
## midblast midblast midblast
## midblast-1-24 midblast-1-3 midblast-1-4
## midblast midblast midblast
## midblast-1-5 midblast-1-6 midblast-1-8
## midblast midblast midblast
## midblast-1-9 midblast-2-1 midblast-2-10
## midblast midblast midblast
## midblast-2-11 midblast-2-12 midblast-2-13
## midblast midblast midblast
## midblast-2-14 midblast-2-15 midblast-2-16
## midblast midblast midblast
## midblast-2-17 midblast-2-18 midblast-2-2
## midblast midblast midblast
## midblast-2-23 midblast-2-24 midblast-2-3
## midblast midblast midblast
## midblast-2-4 midblast-2-5 midblast-2-6
## midblast midblast midblast
## midblast-2-7 midblast-2-8 midblast-2-9
## midblast midblast midblast
## midblast-3-1 midblast-3-10 midblast-3-11
## midblast midblast midblast
## midblast-3-12 midblast-3-13 midblast-3-14
## midblast midblast midblast
## midblast-3-15 midblast-3-17 midblast-3-18
## midblast midblast midblast
## midblast-3-2 midblast-3-23 midblast-3-3
## midblast midblast midblast
## midblast-3-4 midblast-3-5 midblast-3-6
## midblast midblast midblast
## midblast-3-7 midblast-3-8 midblast-3-9
## midblast midblast midblast
## zygote-1 zygote-2 zygote-3
## zygote zygote zygote
## zygote-4 16cell-2pooled-split6a 16cell-2pooled-split6b
## zygote 16cell 16cell
## 16cell-2pooled-split8a 16cell-2pooled-split8b 16cell-split1a
## 16cell 16cell 16cell
## 16cell-split1b 16cell-split2a 16cell-split2b
## 16cell 16cell 16cell
## 8cell-12-1-smartseq2 8cell-12-2-smartseq2 8cell-12-3-smartseq2
## 8cell 8cell 8cell
## 8cell-12-4-smartseq2 8cell-13-1-smartseq2 8cell-14-1-smartseq2
## 8cell 8cell 8cell
## 8cell-14-2-smartseq2 8cell-14-3-smartseq2 8cell-14-4-smartseq2
## 8cell 8cell 8cell
## 8cell-2pooled-split5a 8cell-2pooled-split5b 8cell-2pooled-split7a
## 8cell 8cell 8cell
## 8cell-2pooled-split7b 8cell-2pooled-split9a 8cell-2pooled-split9b
## 8cell 8cell 8cell
## 8cell-split3a 8cell-split3b 8cell-split4a
## 8cell 8cell 8cell
## 8cell-split4b fibroblast-13-CxB fibroblast-14-CxB
## 8cell fibroblast fibroblast
## fibroblast-15-CxB fibroblast-16-CxB fibroblast-17-BxC
## fibroblast fibroblast fibroblast
## fibroblast-19-BxC fibroblast-20-BxC fibroblast-21-BxC
## fibroblast fibroblast fibroblast
## fibroblast-22-BxC fibroblast-9-CxB
## fibroblast fibroblast
## 13 Levels: 16cell 4cell 8cell BXC C57twocell early2cell ... zygote
ML11[["in.vivo"]] <- Idents(object = ML11)
ML11$orig.ident <- "ML11"
ML1.data <- Read10X(data.dir = "/home/moyra.lawrence/count/Empty_vector3/outs/filtered_feature_bc_matrix/")
ML2.data <- Read10X(data.dir = "/home/moyra.lawrence/count/Dppa2/outs/filtered_feature_bc_matrix/")
ML3.data <- Read10X(data.dir = "/home/moyra.lawrence/count/Gata3/outs/filtered_feature_bc_matrix/")
ML4.data <- Read10X(data.dir = "/home/moyra.lawrence/count/MERVL_VP64/outs/filtered_feature_bc_matrix/")
ML5.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Empty_puro/outs/filtered_feature_bc_matrix/")
ML6.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Empty_hygro/outs/filtered_feature_bc_matrix/")
ML7.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Dppa2_Gata3/outs/filtered_feature_bc_matrix/")
ML8.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Dppa2_MERVL/outs/filtered_feature_bc_matrix/")
ML9.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Gata3_MERVL/outs/filtered_feature_bc_matrix/")
ML10.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Dppa2_Gata3_MERVL/outs/filtered_feature_bc_matrix/")
ML1 <- CreateSeuratObject(counts = ML1.data, project = "ML1")
ML2 <- CreateSeuratObject(counts = ML2.data, project = "ML2")
ML3 <- CreateSeuratObject(counts = ML3.data, project = "ML3")
ML4 <- CreateSeuratObject(counts = ML4.data, project = "ML4")
ML5 <- CreateSeuratObject(counts = ML5.data, project = "ML5")
ML6 <- CreateSeuratObject(counts = ML6.data, project = "ML6")
ML7 <- CreateSeuratObject(counts = ML7.data, project = "ML7")
ML8 <- CreateSeuratObject(counts = ML8.data, project = "ML8")
ML9 <- CreateSeuratObject(counts = ML9.data, project = "ML9")
ML10 <- CreateSeuratObject(counts = ML10.data, project = "ML10")
ML1
## An object of class Seurat
## 32293 features across 4406 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML2
## An object of class Seurat
## 32293 features across 3977 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML3
## An object of class Seurat
## 32293 features across 4835 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML4
## An object of class Seurat
## 32293 features across 3501 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML5
## An object of class Seurat
## 32293 features across 4921 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML6
## An object of class Seurat
## 32293 features across 4179 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML7
## An object of class Seurat
## 32293 features across 5105 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML8
## An object of class Seurat
## 32293 features across 3117 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML9
## An object of class Seurat
## 32293 features across 4678 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML10
## An object of class Seurat
## 32293 features across 3210 samples within 1 assay
## Active assay: RNA (32293 features, 0 variable features)
ML1[["percent.mt"]] <- PercentageFeatureSet(ML1, pattern = "^mt-")
ML2[["percent.mt"]] <- PercentageFeatureSet(ML2, pattern = "^mt-")
ML3[["percent.mt"]] <- PercentageFeatureSet(ML3, pattern = "^mt-")
ML4[["percent.mt"]] <- PercentageFeatureSet(ML4, pattern = "^mt-")
ML5[["percent.mt"]] <- PercentageFeatureSet(ML5, pattern = "^mt-")
ML6[["percent.mt"]] <- PercentageFeatureSet(ML6, pattern = "^mt-")
ML7[["percent.mt"]] <- PercentageFeatureSet(ML7, pattern = "^mt-")
ML8[["percent.mt"]] <- PercentageFeatureSet(ML8, pattern = "^mt-")
ML9[["percent.mt"]] <- PercentageFeatureSet(ML9, pattern = "^mt-")
ML10[["percent.mt"]] <- PercentageFeatureSet(ML10, pattern = "^mt-")
3500 < nFeature_RNA
10,000 < nCount_RNA <100,000
0 =< percent.mt < 10
ML1.filtered <- subset(ML1, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML2.filtered <- subset(ML2, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML3.filtered <- subset(ML3, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML4.filtered <- subset(ML4, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML5.filtered <- subset(ML5, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML6.filtered <- subset(ML6, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML7.filtered <- subset(ML7, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML8.filtered <- subset(ML8, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML9.filtered <- subset(ML9, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML10.filtered <- subset(ML10, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
merge.filtered <- merge(ML1.filtered, y = c(ML2.filtered, ML3.filtered, ML4.filtered, ML5.filtered, ML6.filtered, ML7.filtered, ML8.filtered, ML9.filtered, ML10.filtered, ML11), add.cell.ids = c("Empty_hygro_1", "Dppa2", "Gata3", "MERVL", "Empty_puro", "Empty_hygro_2", "Dppa2_Gata3", "Dppa2_MERVL", "Gata3_MERVL", "Dppa2_Gata3_MERVL", "Embryo"), project = "ML1")
merge.filtered
## An object of class Seurat
## 36331 features across 34804 samples within 1 assay
## Active assay: RNA (36331 features, 0 variable features)
head(colnames(merge.filtered))
## [1] "Empty_hygro_1_AAACCCAAGGAAGTAG-1" "Empty_hygro_1_AAACCCAGTCGCACGT-1"
## [3] "Empty_hygro_1_AAACCCAGTGTGCTTA-1" "Empty_hygro_1_AAACCCATCTGGTCAA-1"
## [5] "Empty_hygro_1_AAACGAAAGTAACCGG-1" "Empty_hygro_1_AAACGAAGTCCTGGTG-1"
tail(colnames(merge.filtered))
## [1] "Embryo_fibroblast-17-BxC" "Embryo_fibroblast-19-BxC"
## [3] "Embryo_fibroblast-20-BxC" "Embryo_fibroblast-21-BxC"
## [5] "Embryo_fibroblast-22-BxC" "Embryo_fibroblast-9-CxB"
unique(sapply(X = strsplit(colnames(merge.filtered), split = "_"), FUN = "[", 1))
## [1] "Empty" "Dppa2" "Gata3" "MERVL" "Embryo"
table(merge.filtered$orig.ident)
##
## ML1 ML10 ML11 ML2 ML3 ML4 ML5 ML6 ML7 ML8 ML9
## 3664 2665 317 3153 3956 3047 4144 3565 4315 2090 3888
table(merge.filtered$in.vivo)
##
## 16cell 4cell 8cell BXC C57twocell early2cell earlyblast
## 58 14 47 13 8 8 43
## fibroblast late2cell lateblast mid2cell midblast zygote
## 10 10 30 12 60 4
vp1 <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA")) + theme(legend.position = 'none')
vp2 <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA")) + theme(legend.position = 'none')
vp3 <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt")) + theme(legend.position = 'none')
vp1.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA"),log=TRUE)+ theme(legend.position = 'none')
vp2.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA"),log=TRUE)+ theme(legend.position = 'none')
vp3.log <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt"),log=TRUE) + theme(legend.position = 'none')
grid.arrange(vp1,vp2,vp3, nrow=1)
grid.arrange(vp1.log,vp2.log,vp3.log, nrow=1)
FeatureScatter(merge.filtered, "nCount_RNA", "percent.mt", pt.size=0.5) + scale_y_continuous(limits=c(0,100)) + scale_x_continuous(limits=c(0,100000))
FeatureScatter(merge.filtered, "nCount_RNA", "nFeature_RNA", pt.size=0.5) + scale_y_continuous(limits=c(0,12000)) + scale_x_continuous(limits=c(0,100000))
merge.list <- SplitObject(merge.filtered, split.by = "orig.ident")
merge.list <- lapply(X = merge.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, verbose = FALSE)
})
features <- SelectIntegrationFeatures(object.list = merge.list)
merge.list <- lapply(X = merge.list, FUN = function(x) {
x <- ScaleData(x, features = features, verbose = FALSE)
x <- RunPCA(x, features = features, verbose = FALSE)
})
anchors <- FindIntegrationAnchors(object.list = merge.list, reference = c(1, 5, 6, 11), reduction = "rpca",
dims = 1:50 )
## Computing 2000 integration features
## Scaling features for provided objects
## Computing within dataset neighborhoods
## Finding anchors between all query and reference datasets
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 2065 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 858 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1391 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1802 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1495 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1109 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1707 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1484 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1250 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1083 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1868 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 2174 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1988 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 2964 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 2104 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1583 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 2022 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1858 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1383 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 2058 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1977 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1835 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 2220 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 2095 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 201 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 167 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 175 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 164 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 177 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 165 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 187 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 176 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 192 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 160 anchors
totipotency <- IntegrateData(anchorset = anchors, dims = 1:50)
## Building integrated reference
## Merging dataset 11 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 6 into 5
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 1 11 into 5 6
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
##
## Integrating dataset 2 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
##
## Integrating dataset 3 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
##
## Integrating dataset 4 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
##
## Integrating dataset 7 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
##
## Integrating dataset 8 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
##
## Integrating dataset 9 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
##
## Integrating dataset 10 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
#Save object
DefaultAssay(totipotency) <- "integrated"
save(totipotency, file="In_vivo_integrated3.rda")
#load(file='In_vivo_integrated3.rda')
totipotency <- FindVariableFeatures(totipotency, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(totipotency), 10)
plot1 <- VariableFeaturePlot(totipotency)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2
### Scaling
all.genes <- rownames(totipotency)
totipotency <- ScaleData(totipotency, features = all.genes)
#Omitted Normalization as this is an integrated object
totipotency <- RunPCA(totipotency, npcs=100, features = rownames(totipotency))
print(totipotency[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(totipotency, dims = 1:6, reduction = "pca")
DimPlot(totipotency, reduction = "pca")
DimHeatmap(totipotency, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(totipotency, ndims=100)
## UMAP resolution = 0.6 for totipotency
dim(totipotency)
#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
totipotency <- FindNeighbors(totipotency, dims = 1:50)
totipotency <- FindClusters(totipotency, resolution = 0.6)
totipotency <- RunUMAP(totipotency, dims = 1:50)
p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
p1
p2 <- DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)
p2
DefaultAssay(totipotency) <- "RNA"
totipotency <- NormalizeData(totipotency)
totipotency <- FindVariableFeatures(totipotency, selection.method = "vst")
g2m_1.genes <- c("Hmgb2","Cdk1","Nusap1","Ube2c","Birc5","Tpx2","Top2a","Ndc80","Cks2","Nuf2","Cks1b", "Mki67", "Tmpo","Cenpf","Tacc3","Fam64a","Smc4","Ccnb2","Ckap2l","Ckap2","Aurkb","Bub1","Kif11","Anp32e","Tubb4b","Gtse1","Kif20b","Hjurp","Cdca3","Hn1","Cdc20","Ttk","Cdc25c","Kif2c","Rangap1","Ncapd2","Dlgap5","Cdca2","Cdca8","Ect2","Kif23","Hmmr","Aurka","Psrc1","Anln","Lbr","Ckap5","Cenpe","Ctcf","Nek2","G2e3","Gas2l3","Cbx5","Cenpa")
s_1.genes <- c("Mcm5","Pcna","Tyms","Fen1","Mcm2","Mcm4","Rrm1","Ung","Gins2","Mcm6","Cdca7","Dtl","Prim1","Uhrf1","Mlf1ip","Hells","Rfc2","Rpa2","Nasp","Rad51ap1","Gmnn","Wdr76","Slbp","Ccne2","Ubr7","Pold3","Msh2","Atad2","Rad51","Rrm2","Cdc45","Cdc6","Exo1","Tipin","Dscc1","Blm","Casp8ap2","Usp1","Clspn","Pola1","Chaf1b","Brip1","E2f8")
totipotency <- CellCycleScoring(totipotency, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)
# view cell cycle scores and phase assignments
head(totipotency[[]])
## orig.ident nCount_RNA nFeature_RNA percent.mt
## Empty_hygro_1_AAACCCAAGGAAGTAG-1 ML1 20184 4191 4.231074
## Empty_hygro_1_AAACCCAGTCGCACGT-1 ML1 26366 5162 3.986194
## Empty_hygro_1_AAACCCAGTGTGCTTA-1 ML1 54111 7691 4.477833
## Empty_hygro_1_AAACCCATCTGGTCAA-1 ML1 28283 5743 4.009476
## Empty_hygro_1_AAACGAAAGTAACCGG-1 ML1 30028 5260 4.878780
## Empty_hygro_1_AAACGAAGTCCTGGTG-1 ML1 19053 4860 4.734163
## in.vivo integrated_snn_res.0.6 seurat_clusters
## Empty_hygro_1_AAACCCAAGGAAGTAG-1 <NA> 2 2
## Empty_hygro_1_AAACCCAGTCGCACGT-1 <NA> 2 2
## Empty_hygro_1_AAACCCAGTGTGCTTA-1 <NA> 9 9
## Empty_hygro_1_AAACCCATCTGGTCAA-1 <NA> 0 0
## Empty_hygro_1_AAACGAAAGTAACCGG-1 <NA> 0 0
## Empty_hygro_1_AAACGAAGTCCTGGTG-1 <NA> 1 1
## S.Score G2M.Score Phase old.ident
## Empty_hygro_1_AAACCCAAGGAAGTAG-1 -0.06194625 -0.306587988 G1 2
## Empty_hygro_1_AAACCCAGTCGCACGT-1 0.13928402 -0.313254895 S 2
## Empty_hygro_1_AAACCCAGTGTGCTTA-1 0.08786819 -0.210559821 S 9
## Empty_hygro_1_AAACCCATCTGGTCAA-1 -0.03951238 -0.109141726 G1 0
## Empty_hygro_1_AAACGAAAGTAACCGG-1 -0.19186304 -0.017738155 G1 0
## Empty_hygro_1_AAACGAAGTCCTGGTG-1 -0.17219763 -0.005161036 G1 1
DimPlot(totipotency, reduction = "umap")
## Scaling out cell cycle
DefaultAssay(totipotency) <- "integrated"
totipotency <- ScaleData(totipotency, vars.to.regress=c("S.Score","G2M.Score"))
## Regressing out S.Score, G2M.Score
## Centering and scaling data matrix
save(totipotency, file="In_vivo_cell_cycle_corrected3.rda")
#load(file='In_vivo_cell_cycle_corrected3.rda')
totipotency <- RunPCA(totipotency, npcs = 100, verbose = FALSE)
totipotency <- RunUMAP(totipotency, reduction = "pca", dims = 1:30)
## 18:26:40 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:26:40 Read 34804 rows and found 30 numeric columns
## 18:26:40 Using Annoy for neighbor search, n_neighbors = 30
## 18:26:40 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:26:44 Writing NN index file to temp file /tmp/RtmpIbyjqn/file2ddd5403e2bc0
## 18:26:44 Searching Annoy index using 1 thread, search_k = 3000
## 18:26:58 Annoy recall = 100%
## 18:26:58 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 18:27:01 Initializing from normalized Laplacian + noise (using irlba)
## 18:27:02 Commencing optimization for 200 epochs, with 1561036 positive edges
## 18:27:20 Optimization finished
totipotency <- FindNeighbors(totipotency, dims = 1:50)
## Computing nearest neighbor graph
## Computing SNN
totipotency <- FindClusters(totipotency, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 34804
## Number of edges: 1030551
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8245
## Number of communities: 17
## Elapsed time: 6 seconds
## 2 singletons identified. 15 final clusters.
DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)
# Integrated analysis ### Identify variable genes 2
tot <- FindVariableFeatures(totipotency, selection.method = "vst", nfeatures = 3000)
top10_2 <- head(VariableFeatures(tot), 10)
plot1 <- VariableFeaturePlot(tot)
plot2 <- LabelPoints(plot = plot1, points = top10_2, repel = TRUE, xnudge=0, ynudge=0)
plot2
# Plot PCA2
print(totipotency[["pca"]], dims = 1:6, nfeatures = 5)
## PC_ 1
## Positive: Exoc4, Dst, Ranbp17, Trip12, Srgap2
## Negative: Cdc20, Tubb4b, Ube2c, Ldha, Aprt
## PC_ 2
## Positive: Phlda3, Btg2, Sdc4, Plk2, Ptpn14
## Negative: Enox1, Adam23, Ntn1, Enah, Gli2
## PC_ 3
## Positive: Hist1h2ae, Ano3, Ddit4l, Ptp4a3, Trp53inp1
## Negative: Col4a2, Col4a1, Lamb1, Srgn, Dab2
## PC_ 4
## Positive: Svop, Phlda3, D630023F18Rik, Cdkn1a, Trp53inp1
## Negative: Arg2, Platr31, Wee2, Tcl1b4, Gm11487
## PC_ 5
## Positive: Hist1h2ae, Hist1h1b, Hist1h1e, Hist1h2ap, 2410006H16Rik
## Negative: Tcf15, Nanog, Tmsb4x, Nfib, Nr5a2
## PC_ 6
## Positive: Ptch1, Nrp2, Utrn, Map4k4, Rragd
## Negative: Gabarapl2, Mylpf, Tbx3, Ankrd33b, Sept1
VizDimLoadings(totipotency, dims = 1:6, reduction = "pca")
DimPlot(totipotency, reduction = "pca")
DimHeatmap(totipotency, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(totipotency, ndims=100)
# Quantifying cells in new clusters
tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Libraries per cluster")
| Libraries per cluster | |||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
| ML1 | ML10 | ML11 | ML2 | ML3 | ML4 | ML5 | ML6 | ML7 | ML8 | ML9 | |
| 0 | 1090 (29.7%) | 780 (29.3%) | 5 (1.6%) | 1000 (31.7%) | 647 (16.4%) | 811 (26.6%) | 975 (23.5%) | 951 (26.7%) | 1069 (24.8%) | 583 (27.9%) | 1043 (26.8%) |
| 1 | 869 (23.7%) | 585 (22%) | 11 (3.5%) | 757 (24%) | 496 (12.5%) | 716 (23.5%) | 883 (21.3%) | 841 (23.6%) | 903 (20.9%) | 432 (20.7%) | 812 (20.9%) |
| 2 | 416 (11.4%) | 283 (10.6%) | 2 (0.6%) | 383 (12.1%) | 350 (8.8%) | 346 (11.4%) | 397 (9.6%) | 293 (8.2%) | 510 (11.8%) | 225 (10.8%) | 546 (14%) |
| 3 | 391 (10.7%) | 369 (13.8%) | 6 (1.9%) | 291 (9.2%) | 301 (7.6%) | 341 (11.2%) | 397 (9.6%) | 452 (12.7%) | 473 (11%) | 257 (12.3%) | 404 (10.4%) |
| 4 | 165 (4.5%) | 185 (6.9%) | 1 (0.3%) | 167 (5.3%) | 245 (6.2%) | 309 (10.1%) | 255 (6.2%) | 323 (9.1%) | 242 (5.6%) | 133 (6.4%) | 316 (8.1%) |
| 5 | 186 (5.1%) | 98 (3.7%) | 0 (0%) | 85 (2.7%) | 40 (1%) | 127 (4.2%) | 761 (18.4%) | 250 (7%) | 435 (10.1%) | 82 (3.9%) | 76 (2%) |
| 6 | 143 (3.9%) | 180 (6.8%) | 0 (0%) | 108 (3.4%) | 179 (4.5%) | 172 (5.6%) | 291 (7%) | 282 (7.9%) | 235 (5.4%) | 125 (6%) | 211 (5.4%) |
| 7 | 15 (0.4%) | 6 (0.2%) | 117 (36.9%) | 7 (0.2%) | 1371 (34.7%) | 49 (1.6%) | 9 (0.2%) | 26 (0.7%) | 68 (1.6%) | 3 (0.1%) | 38 (1%) |
| 8 | 306 (8.4%) | 50 (1.9%) | 0 (0%) | 247 (7.8%) | 63 (1.6%) | 86 (2.8%) | 58 (1.4%) | 30 (0.8%) | 99 (2.3%) | 26 (1.2%) | 25 (0.6%) |
| 9 | 77 (2.1%) | 83 (3.1%) | 0 (0%) | 80 (2.5%) | 6 (0.2%) | 74 (2.4%) | 96 (2.3%) | 105 (2.9%) | 202 (4.7%) | 91 (4.4%) | 73 (1.9%) |
| 10 | 2 (0.1%) | 0 (0%) | 4 (1.3%) | 0 (0%) | 210 (5.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 16 (0.4%) | 0 (0%) | 293 (7.5%) |
| 11 | 1 (0%) | 44 (1.7%) | 42 (13.2%) | 28 (0.9%) | 42 (1.1%) | 14 (0.5%) | 3 (0.1%) | 1 (0%) | 59 (1.4%) | 106 (5.1%) | 46 (1.2%) |
| 12 | 0 (0%) | 0 (0%) | 118 (37.2%) | 0 (0%) | 4 (0.1%) | 2 (0.1%) | 1 (0%) | 1 (0%) | 2 (0%) | 0 (0%) | 2 (0.1%) |
| 13 | 3 (0.1%) | 2 (0.1%) | 3 (0.9%) | 0 (0%) | 2 (0.1%) | 0 (0%) | 18 (0.4%) | 10 (0.3%) | 2 (0%) | 27 (1.3%) | 3 (0.1%) |
| 14 | 0 (0%) | 0 (0%) | 8 (2.5%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Embryo cells per cluster")
| Embryo cells per cluster | |||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 16cell | 4cell | 8cell | BXC | C57twocell | early2cell | earlyblast | fibroblast | late2cell | lateblast | mid2cell | midblast | zygote | |
| 0 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 1 (10%) | 0 (0%) | 1 (3.3%) | 0 (0%) | 3 (5%) | 0 (0%) |
| 1 | 0 (0%) | 0 (0%) | 0 (0%) | 5 (38.5%) | 0 (0%) | 0 (0%) | 4 (9.3%) | 1 (10%) | 0 (0%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 2 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 2 (3.3%) | 0 (0%) |
| 3 | 1 (1.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 1 (2.3%) | 4 (40%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 4 | 1 (1.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 5 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 6 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 7 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 35 (81.4%) | 4 (40%) | 0 (0%) | 24 (80%) | 0 (0%) | 54 (90%) | 0 (0%) |
| 8 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 9 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 10 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 4 (13.3%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 11 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 8 (100%) | 8 (100%) | 0 (0%) | 0 (0%) | 10 (100%) | 0 (0%) | 12 (100%) | 0 (0%) | 4 (100%) |
| 12 | 53 (91.4%) | 14 (100%) | 47 (100%) | 0 (0%) | 0 (0%) | 0 (0%) | 3 (7%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 1 (1.7%) | 0 (0%) |
| 13 | 3 (5.2%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 14 | 0 (0%) | 0 (0%) | 0 (0%) | 8 (61.5%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
DefaultAssay(totipotency) <- "RNA"
mus.markers <- FindAllMarkers(totipotency, only.pos = TRUE, min.pct =0.1, logfc.threshold = 0.25)
write.xlsx(mus.markers, file = 'In_vivo_markers3.xlsx')
mus.markers %>% group_by(cluster) %>% top_n(n = 4, wt = avg_log2FC)
totipotency <- ScaleData(totipotency, features = all.genes)
VlnPlot(totipotency, features = features_vec, pt.size=0)
#Heatmap
top10 <- mus.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
DoHeatmap(totipotency, features = top10$gene) + NoLegend() + theme(axis.text.y = element_text(size = 4.5))
## Warning in DoHeatmap(totipotency, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the RNA assay:
## Trf, Ahsg, Apoa2, Apoa1, Gc, Sepp1, Fgb, Serpina1b, Serpina3k, Alb, Hprt,
## Isyna1, Mdh2, Eno1, Ppia, Pdxk, Gpd1l, Gnb2l1, Gm11517, Tceb1, Alppl2, Timd2,
## Spin1, LOC100502936, H3f3b, Gas5, Mat2a, Dynll1, Cks2, Cotl1, Clspn, Usp37,
## Hells, Ung, Cdc6, Slbp, Rrm2, Dtl, Ftl1, Prdx1, Gsta4, H2afz
p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)
p1
## Cell types from in vivo data
p1 <- DimPlot(totipotency, reduction = "umap", group.by = "in.vivo")
p1
new.cluster.ids <- c("Pluripotent", "Pluripotent", "Pluripotent", "Pluripotent", "Pluripotent","Pluripotent", "Pluripotent", "Blastocyst, fibroblast", "Pluripotent", "Pluripotent", "Primitive Endoderm", "Totipotent, 2C, zygote", "4.8.16 cell", "Pluripotent", "Liver")
names(new.cluster.ids) <- levels(totipotency)
totipotency <- RenameIdents(totipotency, new.cluster.ids)
DimPlot(totipotency, reduction = "umap", label = TRUE, pt.size = 0.5, repel = TRUE) + NoLegend()
tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
| Cluster_distribution | |||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
| ML1 | ML10 | ML11 | ML2 | ML3 | ML4 | ML5 | ML6 | ML7 | ML8 | ML9 | |
| Pluripotent | 3646 (99.5%) | 2615 (98.1%) | 28 (8.8%) | 3118 (98.9%) | 2329 (58.9%) | 2982 (97.9%) | 4131 (99.7%) | 3537 (99.2%) | 4170 (96.6%) | 1981 (94.8%) | 3509 (90.3%) |
| Blastocyst, fibroblast | 15 (0.4%) | 6 (0.2%) | 117 (36.9%) | 7 (0.2%) | 1371 (34.7%) | 49 (1.6%) | 9 (0.2%) | 26 (0.7%) | 68 (1.6%) | 3 (0.1%) | 38 (1%) |
| Primitive Endoderm | 2 (0.1%) | 0 (0%) | 4 (1.3%) | 0 (0%) | 210 (5.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 16 (0.4%) | 0 (0%) | 293 (7.5%) |
| Totipotent, 2C, zygote | 1 (0%) | 44 (1.7%) | 42 (13.2%) | 28 (0.9%) | 42 (1.1%) | 14 (0.5%) | 3 (0.1%) | 1 (0%) | 59 (1.4%) | 106 (5.1%) | 46 (1.2%) |
| 4.8.16 cell | 0 (0%) | 0 (0%) | 118 (37.2%) | 0 (0%) | 4 (0.1%) | 2 (0.1%) | 1 (0%) | 1 (0%) | 2 (0%) | 0 (0%) | 2 (0.1%) |
| Liver | 0 (0%) | 0 (0%) | 8 (2.5%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
| Cluster_distribution | |||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 16cell | 4cell | 8cell | BXC | C57twocell | early2cell | earlyblast | fibroblast | late2cell | lateblast | mid2cell | midblast | zygote | |
| Pluripotent | 5 (8.6%) | 0 (0%) | 0 (0%) | 5 (38.5%) | 0 (0%) | 0 (0%) | 5 (11.6%) | 6 (60%) | 0 (0%) | 2 (6.7%) | 0 (0%) | 5 (8.3%) | 0 (0%) |
| Blastocyst, fibroblast | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 35 (81.4%) | 4 (40%) | 0 (0%) | 24 (80%) | 0 (0%) | 54 (90%) | 0 (0%) |
| Primitive Endoderm | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 4 (13.3%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Totipotent, 2C, zygote | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 8 (100%) | 8 (100%) | 0 (0%) | 0 (0%) | 10 (100%) | 0 (0%) | 12 (100%) | 0 (0%) | 4 (100%) |
| 4.8.16 cell | 53 (91.4%) | 14 (100%) | 47 (100%) | 0 (0%) | 0 (0%) | 0 (0%) | 3 (7%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 1 (1.7%) | 0 (0%) |
| Liver | 0 (0%) | 0 (0%) | 0 (0%) | 8 (61.5%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
DimPlot(totipotency, reduction = "umap", split.by = "orig.ident", ncol = 3)
save(totipotency, file="In_vivo_final3.rda")
DefaultAssay(totipotency) <- "RNA"
totipotency <- CellCycleScoring(totipotency, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)
DimPlot(totipotency, reduction = "umap")
#Cell cycle bar chart
totipotency@meta.data %>% ggplot(aes(x=old.ident,fill=Phase)) +
geom_bar(position = "fill",size = 3, width = .8) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "", y = "Cell fraction")
#Feature plot
FeaturePlot(totipotency, features = "Arg2")